* Output mean income by specialty used in the choice model
	
	use "${intermediate_data}/NRMP/specialty_applicant.dta", clear
	
	keep year nrmp_spec_desc n_dist_match_sn_* n_dist_no_match_sn_*
	
	* Aggregate specialties to balance the data and remove very small data cells
	
	replace nrmp_spec_desc="Surgery" if nrmp_spec_desc=="Vascular Surgery"
	replace nrmp_spec_desc="Surgery" if nrmp_spec_desc=="Neurological Surgery"
	replace nrmp_spec_desc="Surgery" if nrmp_spec_desc=="Otolaryngology"
	replace nrmp_spec_desc="Pediatrics" if nrmp_spec_desc=="Child Neurology"
	replace nrmp_spec_desc="Pediatrics" if nrmp_spec_desc=="Internal Medicine/Pediatrics"
	replace nrmp_spec_desc="Internal Medicine" if nrmp_spec_desc=="Neurology"
	
	collapse (sum) n_*, by (nrmp_spec_desc year)
	
	reshape long n_dist_match_sn_ n_dist_no_match_sn_, ///
		i(year nrmp_spec_desc) j(score_group)
		
	* Add up score groups with scores below 180 and 180-190 to decrease the number of small cells
	
	replace score_group=190 if score_group==180
	replace score_group=270 if score_group==300
	collapse (sum) n_dist_match_sn_ n_dist_no_match_sn_, by(year nrmp_spec_desc score_group)

	merge m:1 nrmp_spec_desc year using "${intermediate_data}/specialty_characteristics/nrmp_spec_characteristics_age4055.dta", keep(match) nogen 
	merge m:1 nrmp_spec_desc year using "${intermediate_data}/spec_choice/rvus_instrument_nrmp_spec.dta", keep(master match) nogen 
	ren freq N_UR 
	gen mean_hourly_income = ptotinc/(52*wkh)
	keep if year==2016
	keep nrmp* mean_hourly_income N_UR
	drbest_docinc mean_hourly_income
	drbcount N_UR, gen(N)
	order N_UR, last 
	
	export delimited using "${mypath}/intermediate_csv/choice_model_mean_spec_inc.csv", replace dataf delim(tab)
